#Topic 8 review and topic 9 # # Here is a new linear regression problem. # First we will generate the data. source("../gnrnd4.R") gnrnd4(6546851306, 3110080302, 18000070) # look at the data L1 L2 # get a plot of the data plot( L1, L2 ) # we can add the axis lines abline(h=0,v=0) # # Then we want to find: # a) the correlation coefficient # b) the regression equation # c) plot the regression equation # d) find all of the expected values # e) plot the expected values # g) the residual value associated with # (9.5, 16.8) # h) compute all of the residual values and # display them # i) pull all of the residual values from the # model and display them # j) use the linear model to predict the y-value # when x=3.7, is that an interpolation or # extrapolation? # k) use the linear model to predict the y-value # when x=13.7, is that an interpolation or # extrapolation? # l) get a plot of the x-values and the residuals # find the correlation coefficient cor( L1, L2) # find the the regression equation our_model <- lm( L2 ~ L1 ) # use tilde not minus sign our_model # so our equation is y = 4.028 + 1.460*x # to plot this, because we saved the model we use abline( our_model ) # # find all of the expected values # To do this we just apply the equation expected <- 4.028 + 1.460*L1 # display the expected values expected # plot the expected values points( L1, expected, col="red", pch=0) # find the residual value associated with # (9.5, 16.8) # We know the observed, it is 16.8 # we could easily recompute the expected this_expect <- 4.028 + 1.460*9.5 this_expect # then the residual is just observed - expected 16.8 - this_expect # compute and display all of the residual values all_resids <- L2 - expected all_resids # pull out the residuals from the model and display them residuals( our_model ) # plot the x values versus the residuals plot( L1, residuals( our_model) ) #use the linear model to predict the y-value # when x=3.7, is that an interpolation or # extrapolation? # Do this twice, first with the coefficients # as we have them. 4.028 + 1.460*3.7 # Then do this again but with more significant # digits by pulling the coefficients from the model. our_coeffs <- coefficients( our_model ) our_coeffs our_coeffs[1] + our_coeffs[2]*3.7 # to see if this is an interpolation or an # extrapolation we need to see if 3.7 is within # the range of L1. Just looking at the plot tells # us that 3.7 is among the known values so this is # an interpolation. #use the linear model to predict the y-value # when x=13.7, is that an interpolation or # extrapolation? our_coeffs[1] + our_coeffs[2]*13.7 # Again, from the plot we see that 13.7 is outside # of the range of known x values so this is an # extrapolation. # # get a plot of the x-values and the residuals plot( L1, all_resids) # Now do the same for this new set of data gnrnd4(6823091306, 4160080302, 18000070) L1 L2 plot(L1,L2) abline( h=seq(-5,5,5), v=seq(-2,8,2), lty="dotted", col="blue") abline( h=0,v=0, col="darkblue", lwd=3) # correlation cor(L1,L2) # our linear model our_model <- lm(L2~L1) our_model # so our equation is y = 3.781 - 1.502*x # or get better coefficients our_coeffs <- coefficients( our_model ) our_coeffs # so our equation is y = 3.781163 - 1.501736*x # plot the regression line abline( our_model, col="red", lwd=3 ) # find the expected value when x=2.73 our_coeffs[1] + our_coeffs[2]*2.73 # note the plus sign # let us plot that point points(2.73, -0.3185765, pch=0, col="blue") # and we note that this is an interpolation # find the expected value when x=-8.34 our_coeffs[1] + our_coeffs[2]*(-8.34) # let us plot that point points(-8.34, 16.30564, pch=0, col="blue") # this did not plot because it is off the graph. # and we note that this is an extrapolation ################################ # here is a different approach to a demonstration # first I will define a function that will # produce y values from x values demo <- function( x) { (x-3)*x*(x+3)/8+2} # then I will test this out demo( 3) demo(-2) # Then we will get some x values gnrnd4(6200451401, 4000020) L1 x <- L1 # and we will use the function to get our y values y <- demo( L1 ) y # now plot the points plot(x,y) # get the correlation coefficient cor( x, y ) # get the linear regression model demo_model <- lm( y~x ) demo_model #draw the regression line abline( demo_model ) # use the model to predict the y value when x=0.2 2.0366+(-0.8146)*(0.2) # graph that point points( 0.2, 1.87368, pch=1, col="red") # see how close that is to the output of the function demo(0.2) # graph that point points( 0.2, 1.776, pch=17, col="blue") # our interpolation worked pretty well, as expected # Now try an extrapolation, say when x=4 # what does our model predict 2.0366+(-0.8146)*(4) # what does our function produce demo( 4 ) # vastly different # a quick aside, look at the distribution of the # residuals demo_res <- residuals( demo_model ) plot( x, demo_res ) # that plot demonstrates that we have a problem here # let us see what our function produces between -5 and 5 x <- seq( -5, 5, 0.01) y <- demo(x) plot( x,y, type="l") abline(h=seq(-5,10,2.5), v=seq(-4,4,1), col="darkgray", lty="dotted") abline(h=0,v=0) abline( demo_model ) #